home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Language/OS - Multiplatform Resource Library
/
LANGUAGE OS.iso
/
ast_comp
/
cpp-kit.lha
/
c++kit
/
Scan.C
< prev
next >
Wrap
C/C++ Source or Header
|
1993-04-11
|
5KB
|
260 lines
/* 9th April, 1993 Mayan Moudgill
* Basically is given a file. If asked it will try and scan one of the
* following tokens from the file.
* strings: "([^"\\\n]|(\.))*"
* identifier: [A-Za-z_0-9]+
* integer: [-+]?[0-9]+
* character: .
* These returns a Token.
* It can also accept a mark, which returns a Mark, and a reject which
* when given a Mark, rolls back the state to the Mark, and starts matching
* from that point onwards. Also, for convenience it can rollback the
* last token. Also, it can match a character and a character-string.
* (these are equivalent to trying to scanning a character/string,
* and rolling back if the character/string does not exactly match the
* argument).
*
* The other facility that the function provides is that it keeps track
* of the number of lines read in.
*
* I can think of at least of two ways of implementing the tokenizing
* and rollback mechanisms: reading files, and mmap'ing them. This
* implementation uses mmap.
*/
#include <sys/types.h>
#include <sys/mman.h>
#include <sys/fcntl.h>
#include <sys/stat.h>
#include <ctype.h>
#include "Scan.H"
Scan::Scan()
: _close(1)
{
}
Scan::Scan(char * name)
{
strncpy(_name, name, 255);
_name[255] = 0;
_fd = open(name, O_RDONLY);
if( _fd < 0 ) {
cerr << "could not open " << name << " cause: " << errno << endl;
_close = 1;
return;
}
struct stat buf;
fstat( _fd, &buf);
_size = buf.st_size;
_file = mmap(0, (int)_size, PROT_READ, MAP_SHARED, _fd, 0);
if( (int)_file == -1 ) {
cerr << "could not mmap " << name << " cause: " << errno << endl;
_close = 1;
}
else {
_close = 0;
_end = _file + _size;
}
if( _size == 0 ) {
_eof = 1;
}
else {
_eof = 0;
}
_oat = _at = _file;
_onl = _nl = 0;
}
Scan::~Scan()
{
if( !_close ) {
munmap( _file, (int)_size);
close( _fd);
_close = 1;
}
}
int Scan::_space()
{
if( _start() ) {
return 1;
}
while( isspace(_get())) {
};
_backup();
_wnl = _tnl;
_wat = _tat;
return _eof;
}
int Scan::number(Token& result)
{
int in;
if( _space() ) {
result = Token(0,0);
return 0;
}
in = _get();
if( in == '-' || in == '+' ) {
in = _get();
}
if( !isdigit(in) ) {
return 0;
}
do {
in = _get();
} while( isdigit(in));
_backup();
result = Token(_wat, _tat);
_commit();
return 1;
}
int Scan::identifier(Token& result)
{
int in;
if( _space() ) {
result = Token(0,0);
return 0;
}
in = _get();
if( in != '_' && !isalpha(in) ) {
result = Token(0, 0);
return 0;
}
do {
in = _get();
} while ( in == '_' || isdigit(in) || isalpha(in) );
_backup();
result = Token(_wat, _tat);
_commit();
return 1;
}
int Scan::string(Token& result )
{
int in;
if( _space() ) {
result = Token(0,0);
return 0;
}
in = _get();
if( in != '"' ) {
result = Token(0,0);
return 0;
}
do {
in = _get();
if( in == '\\' ) {
_get();
in = _get();
}
} while( in != '"' );
result = Token(_wat, _tat);
_commit();
return 1;
}
int Scan::token(Token& result)
{
int in;
if( _space() ) {
result = Token(0,0);
return 0;
}
in = _get();
_backup();
if( in == '"') {
if( string(result) ) {
result = Token( result.string()+1, result.length() - 2);
return 1;
}
else {
return 0;
}
}
else {
return identifier(result);
}
}
int Scan::character(Token& result)
{
if( _start() ) {
result = Token(0, 0);
return 0;
}
_get();
result = Token(_wat, 1);
_commit();
return 1;
}
int Scan::match(char c, Token& result)
{
int in;
if( _start() ) {
result = Token(0,0);
return 0;
}
in = _get();
if( in == c ) {
_commit();
result = Token(_wat, 1);
return 1;
}
else {
result = Token(0,0);
return 0;
}
}
int Scan::match(char * s, Token& result)
{
int in;
if( _space() ) {
result = Token(0,0);
return 0;
}
while( *s != 0 ) {
in = _get();
if( in != *s ) {
break;
}
s++;
}
if( *s == 0 ) {
_commit();
result = Token(_wat, _tat);
return 1;
}
else {
result = Token(0,0);
return 0;
}
}
int Scan::eof()
{
return _close || _at == _end;
}